This code is for the caregiver speech measures from transcripts used in the manuscript.

Load libraries and set theme

library(tidyverse)
library(Hmisc)
library(GGally)
library(ppcor)
library(gridExtra)
library(psych)

# https://github.com/ggobi/ggally/issues/139
my_custom_smooth <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(alpha = .4, color = I("black")) + 
    geom_smooth(method = "lm", color = I("blue"), ...)
}

theme_set(theme_bw())

Read in data

# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods


# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'

# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  dplyr::select(-"...1") %>% 
  filter(activity != "kwalods") %>% 
  filter(speech == "all") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", 
                                                "gemods")), 
         id = factor(id), 
         language = factor(language), 
         speech = factor(speech))


# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", "ods")), 
         id = factor(id), 
         language = factor(language), 
         speech = factor(speech)) %>% 
  filter(speech == "all")


# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>% 
  filter(activity != "ods") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac")), 
         id = factor(id), 
         language = factor(language)) 

str(freq)
## tibble [3,308 × 12] (S3: tbl_df/tbl/data.frame)
##  $ id           : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
##  $ rectime      : num [1:3308] 11923 11923 31360 31360 21499 ...
##  $ activity     : Factor w/ 7 levels "books","play",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker      : chr [1:3308] "CHI" "ADULTS" "CHI" "ADULTS" ...
##  $ tokens       : num [1:3308] 30 151 35 143 58 588 42 286 33 152 ...
##  $ types        : num [1:3308] 17 70 17 65 17 199 19 53 17 59 ...
##  $ segment_num  : num [1:3308] 12 12 15 15 2 2 11 11 5 5 ...
##  $ language     : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech       : Factor w/ 1 level "all": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dur_min      : num [1:3308] 3.55 3.55 6.57 6.57 4.71 ...
##  $ tokens_permin: num [1:3308] 8.46 42.57 5.32 21.75 12.31 ...
##  $ types_permin : num [1:3308] 4.79 19.73 2.59 9.89 3.61 ...
str(mlu)
## tibble [3,002 × 9] (S3: tbl_df/tbl/data.frame)
##  $ id         : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ activity   : Factor w/ 7 levels "books","play",..: 6 6 5 5 7 7 2 2 6 6 ...
##  $ speaker    : chr [1:3002] "ADULTS" "CHI" "ADULTS" "CHI" ...
##  $ segment_num: num [1:3002] 2 2 2 2 2 2 2 2 3 3 ...
##  $ words_sum  : num [1:3002] 210 66 175 43 11 16 189 47 261 78 ...
##  $ num_utt_sum: num [1:3002] 66 35 64 24 2 12 64 28 87 43 ...
##  $ mlu_w      : num [1:3002] 3.18 1.89 2.73 1.79 5.5 ...
##  $ language   : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech     : Factor w/ 2 levels "all","spont": 1 1 1 1 1 1 1 1 1 1 ...
str(chip)
## tibble [1,118 × 11] (S3: tbl_df/tbl/data.frame)
##  $ activity                         : Factor w/ 6 levels "books","play",..: 6 5 2 6 5 4 6 5 4 2 ...
##  $ id                               : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ rectime                          : num [1:1118] 15242 15242 15242 14342 14342 ...
##  $ total_adult_utt                  : num [1:1118] 68 64 65 91 43 13 50 8 65 127 ...
##  $ total_child_utt                  : num [1:1118] 46 34 33 54 17 3 14 1 29 49 ...
##  $ total_adult_resp                 : num [1:1118] 62 51 54 77 24 9 30 4 56 106 ...
##  $ total_adult_imitexp              : num [1:1118] 18 13 15 25 5 2 9 0 16 21 ...
##  $ prop_adultresp_outof_childutt    : num [1:1118] 1.35 1.5 1.64 1.43 1.41 ...
##  $ prop_adult_imitexp_outof_childutt: num [1:1118] 0.391 0.382 0.455 0.463 0.294 ...
##  $ language                         : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ segment_num                      : num [1:1118] 2 2 2 3 3 3 4 4 4 5 ...

Create dfs for ADULTS

# FREQ
freq_adult <- freq %>% 
  filter(speaker == "ADULTS")

# MLU
mlu_adult <- mlu %>% 
  filter(speaker == "ADULTS")

FREQ - Boxplots and descriptives for ADULTS

TOKENS (raw)

Freq (tokens, types)

  • excluded non-target children (NTC)
  • excluded ODS during tCDS periods
  • averaged across all adult speakers within activities
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • 0 values are included when the individual does not speak though we know they are present in the activity
# relabel for plots
freq_adult <- freq_adult %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
                                     "gemods" = "non_tcds"), 
         language = recode(language, "English" = "english", "Spanish" = "spanish"))


# plot for all speech
ggplot(freq_adult, aes(activity, tokens, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# summarize data per participant and activity (all speech only)
freq_adult_act <- freq_adult %>% 
  group_by(id, activity) %>% 
  mutate(tokens_act = mean(tokens), 
         types_act = mean(types)) %>% 
  distinct(id, language, activity, tokens_act, types_act)

freq_adult_act_en <- freq_adult_act %>% filter(language == "english")
freq_adult_act_sp <- freq_adult_act %>% filter(language == "spanish")



# descriptives
describeBy(freq_adult_act_en$tokens_act, freq_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n      mean        sd    median   min       max    range      skew
## X11    1          books    1 22 480.90076 272.79828 414.25000 147.0 1115.7500 968.7500 0.6058672
## X12    2           play    1 39 234.99060 132.96267 208.33333  42.5  714.0000 671.5000 1.0915721
## X13    3           food    1 31 145.78495 124.42440  89.66667  12.0  432.0000 420.0000 0.8051817
## X14    4       routines    1 32 135.71823 107.06498 106.25000   7.0  494.0000 487.0000 1.3675315
## X15    5    unst. conv.    1 43 152.50504 100.82982 114.50000  11.0  382.7500 371.7500 0.5656557
## X16    6 adult-centered    1 45  90.56519  60.38539  87.50000   4.0  295.0000 291.0000 0.9865811
## X17    7       non_tcds    1 45 154.69407  96.87180 152.00000  15.0  418.6667 403.6667 0.7289589
##       kurtosis        se
## X11 -0.7675539 58.160789
## X12  2.0768412 21.291067
## X13 -0.7341471 22.347282
## X14  2.1046767 18.926592
## X15 -0.7104771 15.376403
## X16  1.3754902  9.001722
## X17  0.1933267 14.440795
freq_adult_en_minmax_tokens_raw <- freq_adult %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens, na.rm = T), 
         max = max(tokens, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_tokens_raw
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 english  books             65  1474
## 2 english  play               0   743
## 3 english  unst. conv.        4   817
## 4 english  routines           7   692
## 5 english  food               1   630
## 6 english  adult-centered     0   397
## 7 english  non_tcds           0   945
describeBy(freq_adult_act_sp$tokens_act, freq_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd   median       min      max    range       skew
## X11    1          books    1 20 436.9875 212.49434 425.9167 77.000000 768.0000 691.0000 -0.1097107
## X12    2           play    1 37 180.9775 136.02617 123.3333 25.000000 571.0000 546.0000  0.9509845
## X13    3           food    1 31 155.3602 119.25669 128.0000  6.000000 445.0000 439.0000  0.7248658
## X14    4       routines    1 35 166.9405 133.27275 135.0000  4.000000 635.5000 631.5000  1.4170829
## X15    5    unst. conv.    1 43 139.6674 123.38751 106.6000 13.000000 754.6667 741.6667  2.9611793
## X16    6 adult-centered    1 45 106.6911  89.50469  71.4000 21.500000 426.5000 405.0000  1.7694977
## X17    7       non_tcds    1 45 113.3085  82.01389 103.5000  4.166667 323.5000 319.3333  0.9357912
##       kurtosis       se
## X11 -1.0449431 47.51518
## X12  0.1647279 22.36256
## X13 -0.5873931 21.41913
## X14  2.3099708 22.52721
## X15 11.7931488 18.81642
## X16  2.6909840 13.34257
## X17  0.1726090 12.22591
freq_adult_sp_minmax_tokens_raw <- freq_adult %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens, na.rm = T), 
         max = max(tokens, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_tokens_raw
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 spanish  books             77   927
## 2 spanish  play              15   802
## 3 spanish  unst. conv.        5  1012
## 4 spanish  routines           4   937
## 5 spanish  food               6   566
## 6 spanish  adult-centered     0   566
## 7 spanish  non_tcds           0   795

TOKENS (rate per min)

freq_adult <- freq_adult %>% 
  mutate(language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot for all speech
ggplot(freq_adult, aes(activity, tokens_permin, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Tokens Rate") + 
  theme(panel.spacing = unit(4, "lines"))

ggsave("./figures/boxplot_tokens_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
freq_adult_act_permin <- freq_adult %>% 
  group_by(id, activity) %>% 
  mutate(tokens_permin_act = mean(tokens_permin), 
         types_permin_act = mean(types_permin)) %>% 
  distinct(id, language, activity, tokens_permin_act, types_permin_act)

freq_adult_act_permin_en <- freq_adult_act_permin %>% filter(language == "English")
freq_adult_act_permin_sp <- freq_adult_act_permin %>% filter(language == "Spanish")

# descriptives
describeBy(freq_adult_act_permin_en$tokens_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd   median       min       max     range       skew
## X11    1          books    1 22 97.05612 25.83716 99.10089 32.157566 139.35186 107.19429 -0.4652193
## X12    2           play    1 39 63.00664 24.36993 58.04526 19.809892 122.54285 102.73296  0.6362527
## X13    3           food    1 31 63.20593 33.34061 56.34873 17.818695 191.38756 173.56887  1.7407592
## X14    4       routines    1 32 70.52775 27.24599 66.26826 28.267219 157.18563 128.91841  1.1449756
## X15    5    unst. conv.    1 43 76.14884 35.36817 73.18092 10.266822 224.58716 214.32033  1.4451765
## X16    6 adult-centered    1 45 85.01633 73.45787 77.63141 19.605147 534.52116 514.91601  5.0378276
## X17    7       non_tcds    1 45 36.98633 21.17975 39.00190  4.063964  87.65849  83.59453  0.5225623
##        kurtosis        se
## X11 -0.09411375  5.508500
## X12  0.06339446  3.902312
## X13  4.60071546  5.988150
## X14  1.57756298  4.816457
## X15  5.13425782  5.393596
## X16 28.20995719 10.950453
## X17 -0.45795070  3.157290
freq_adult_en_minmax_tokens_rate <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens_permin, na.rm = T), 
         max = max(tokens_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_tokens_rate
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 English  books          21.7   149.
## 2 English  play            0     183.
## 3 English  unst. conv.     2.93  225.
## 4 English  routines       21.4   157.
## 5 English  food           12.3   191.
## 6 English  adult-centered  0     535.
## 7 English  non_tcds        0     167.
describeBy(freq_adult_act_permin_sp$tokens_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd   median       min       max     range       skew
## X11    1          books    1 20 69.70156 22.42204 71.71504 33.415438 114.94578  81.53034 0.11976814
## X12    2           play    1 37 51.27875 20.41805 52.27932  8.535673 118.63334 110.09767 0.62021093
## X13    3           food    1 31 37.39510 15.81841 37.57642  9.649060  71.37418  61.72512 0.07164798
## X14    4       routines    1 35 59.05829 22.87843 58.32433 13.861479 118.88422 105.02275 0.42422784
## X15    5    unst. conv.    1 43 59.56066 23.87057 57.76884 24.971949 137.97635 113.00440 0.98462727
## X16    6 adult-centered    1 45 56.17319 33.33514 53.78916 14.119577 161.15476 147.03518 1.17089383
## X17    7       non_tcds    1 45 31.05852 17.21041 28.27339  2.157086  71.16063  69.00355 0.29725236
##       kurtosis       se
## X11 -0.8333837 5.013720
## X12  1.4100577 3.356707
## X13 -0.7574487 2.841070
## X14  0.2797878 3.867161
## X15  1.0899492 3.640229
## X16  1.4051846 4.969310
## X17 -0.8203294 2.565577
freq_adult_sp_minmax_tokens_rate <- freq_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens_permin, na.rm = T), 
         max = max(tokens_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_tokens_rate
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 Spanish  books          33.4  115. 
## 2 Spanish  play            8.54 128. 
## 3 Spanish  unst. conv.    10.4  140. 
## 4 Spanish  routines       13.3  183. 
## 5 Spanish  food            9.65  91.1
## 6 Spanish  adult-centered  0    343. 
## 7 Spanish  non_tcds        0    153.

TYPES (raw)

# df for just all speech
ggplot(freq_adult, aes(activity, types, fill = activity)) +
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# descriptives
describeBy(freq_adult_act_en$types_act, freq_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n      mean       sd    median  min      max    range      skew
## X11    1          books    1 22 143.90227 66.70063 116.50000 56.0 277.7500 221.7500 0.4732497
## X12    2           play    1 39  85.90256 36.92368  81.50000 27.5 197.0000 169.5000 0.8994632
## X13    3           food    1 31  64.56989 42.01631  50.00000 12.0 177.0000 165.0000 0.6751602
## X14    4       routines    1 32  58.62083 31.98695  52.00000  7.0 144.0000 137.0000 0.6238625
## X15    5    unst. conv.    1 43  65.25039 36.72048  53.33333 10.0 163.0000 153.0000 0.6798436
## X16    6 adult-centered    1 45  44.60630 24.26759  43.40000  4.0 129.0000 125.0000 0.8473480
## X17    7       non_tcds    1 45  79.91481 44.48101  71.33333 10.5 199.8333 189.3333 0.4831218
##        kurtosis        se
## X11 -1.12778313 14.220623
## X12  1.00289109  5.912521
## X13 -0.46931520  7.546351
## X14  0.06720687  5.654547
## X15 -0.18936585  5.599821
## X16  1.60694389  3.617598
## X17 -0.42847167  6.630837
freq_adult_en_minmax_types_raw <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types, na.rm = T), 
         max = max(types, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_types_raw
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 English  books             38   369
## 2 English  play               0   253
## 3 English  unst. conv.        4   267
## 4 English  routines           7   246
## 5 English  food               1   181
## 6 English  adult-centered     0   159
## 7 English  non_tcds           0   351
describeBy(freq_adult_act_sp$types_act, freq_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n      mean       sd    median  min      max    range      skew
## X11    1          books    1 20 144.82500 64.74266 138.50000 44.0 268.5000 224.5000 0.2285406
## X12    2           play    1 37  62.33784 35.84100  54.66667 14.0 157.0000 143.0000 0.6215909
## X13    3           food    1 31  65.50538 36.39642  69.00000  5.0 133.0000 128.0000 0.1241942
## X14    4       routines    1 35  65.69762 33.60470  65.00000  4.0 141.5000 137.5000 0.3473994
## X15    5    unst. conv.    1 43  59.88527 32.98391  52.00000  8.0 160.6667 152.6667 0.8136958
## X16    6 adult-centered    1 45  48.42926 26.92731  40.60000 15.0 121.0000 106.0000 1.0463985
## X17    7       non_tcds    1 45  59.29000 36.12099  55.16667  3.5 137.8333 134.3333 0.5162437
##        kurtosis        se
## X11 -1.03063121 14.476900
## X12 -0.30729538  5.892225
## X13 -1.22818040  6.536989
## X14 -0.46882830  5.680232
## X15  0.38015627  5.030000
## X16  0.08898838  4.014086
## X17 -0.42162311  5.384600
freq_adult_sp_minmax_types_raw <- freq_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types, na.rm = T), 
         max = max(types, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_types_raw
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 Spanish  books             44   361
## 2 Spanish  play               7   186
## 3 Spanish  unst. conv.        5   219
## 4 Spanish  routines           4   175
## 5 Spanish  food               5   166
## 6 Spanish  adult-centered     0   212
## 7 Spanish  non_tcds           0   289

TYPES (rate per min)

# plot for all speech
ggplot(freq_adult, aes(activity, types_permin, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Types Rate")

ggsave("./figures/boxplot_types_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# descriptives
describeBy(freq_adult_act_permin_en$types_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd   median       min       max     range      skew
## X11    1          books    1 22 32.06383  9.75467 32.15751 14.809274  48.75449  33.94522 0.1938847
## X12    2           play    1 39 27.07891 12.32556 24.04612 12.245120  66.31838  54.07326 1.5066690
## X13    3           food    1 31 39.82161 34.99837 30.28977  9.280570 191.38756 182.10699 2.7880293
## X14    4       routines    1 32 41.75257 25.57367 35.05380 14.409224 157.18563 142.77640 2.8301947
## X15    5    unst. conv.    1 43 41.83628 27.40421 36.04217  5.648173 176.14679 170.49862 2.8015041
## X16    6 adult-centered    1 45 61.14224 75.86224 46.13497 10.605640 534.52116 523.91552 5.3822148
## X17    7       non_tcds    1 45 20.66782 11.41460 20.40530  2.392211  48.90393  46.51172 0.5745082
##       kurtosis        se
## X11 -1.0590251  2.079703
## X12  2.2094453  1.973670
## X13  8.8782125  6.285893
## X14 10.1753362  4.520830
## X15 10.9712805  4.179103
## X16 30.7217214 11.308874
## X17 -0.3456499  1.701588
freq_adult_en_minmax_types_rate <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types_permin, na.rm = T), 
         max = max(types_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_types_rate
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 English  books           9.89  64.4
## 2 English  play            0    149. 
## 3 English  unst. conv.     2.93 176. 
## 4 English  routines       11.2  157. 
## 5 English  food            8.50 191. 
## 6 English  adult-centered  0    535. 
## 7 English  non_tcds        0     92.2
describeBy(freq_adult_act_permin_sp$types_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd   median       min       max     range      skew
## X11    1          books    1 20 25.19438  9.378178 25.62804 10.866809  49.60044  38.73363 0.6192718
## X12    2           play    1 37 21.98845 11.038304 19.62847  4.405509  66.43467  62.02916 1.7173151
## X13    3           food    1 31 19.09226  8.249124 17.82146  8.576942  39.99831  31.42137 0.8860032
## X14    4       routines    1 35 30.57141 14.693167 28.80377  9.082726  73.61036  64.52764 1.1037506
## X15    5    unst. conv.    1 43 32.41628 16.819376 26.79962 10.061461 100.19711  90.13565 1.7190575
## X16    6 adult-centered    1 45 38.00249 28.213513 33.91582  7.901010 137.53863 129.63762 1.8238166
## X17    7       non_tcds    1 45 18.23642  9.936926 16.33417  1.666453  43.46532  41.79886 0.5847300
##        kurtosis       se
## X11  0.07872493 2.097024
## X12  4.88857397 1.814686
## X13 -0.11762887 1.481586
## X14  1.11375487 2.483598
## X15  4.05465209 2.564931
## X16  3.43928396 4.205822
## X17 -0.16215447 1.481309
freq_adult_sp_minmax_types_rate <- freq_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types_permin, na.rm = T), 
         max = max(types_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_types_rate
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 Spanish  books          10.9   49.6
## 2 Spanish  play            4.41  71.2
## 3 Spanish  unst. conv.     6.68 100. 
## 4 Spanish  routines        8.17 171. 
## 5 Spanish  food            5.38  55.7
## 6 Spanish  adult-centered  0    343. 
## 7 Spanish  non_tcds        0    130.

MLU

MLU

  • excluded NTC
  • KEPT ODS during tCDS periods
  • averaged across all adult speakers
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • we excluded observations when there were 0 number of utterances, since we cannot calculate an MLU when there are no utterances
# relabel for plots
mlu_adult <- mlu_adult %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
                           "ods" = "non_tcds"), 
         language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot
ggplot(mlu_adult, aes(activity, mlu_w, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "MLUw")

ggsave("./figures/boxplot_mluw2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
mlu_adult_act <- mlu_adult %>% 
  group_by(id, activity) %>% 
  mutate(mlu_w_act = mean(mlu_w)) %>% 
  distinct(id, language, activity, mlu_w_act)

mlu_adult_act_en <- mlu_adult_act %>% filter(language == "English")
mlu_adult_act_sp <- mlu_adult_act %>% filter(language == "Spanish")

# descriptives
describeBy(mlu_adult_act_en$mlu_w_act, mlu_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd   median      min      max    range        skew
## X11    1          books    1 22 4.860163 1.2386474 4.849059 2.843305 7.287671 4.444366  0.38589952
## X12    2           play    1 39 3.544910 0.7799806 3.361351 2.117647 5.428571 3.310924  0.37501840
## X13    3           food    1 31 3.508170 0.8406000 3.268261 2.125000 5.328869 3.203869  0.35194344
## X14    4       routines    1 32 3.665860 0.7357554 3.610858 2.200000 5.670455 3.470455  0.54603786
## X15    5    unst. conv.    1 43 3.763843 0.8423445 3.772650 1.500000 5.769231 4.269231 -0.32572952
## X16    6 adult-centered    1 45 3.524902 0.7379635 3.407890 2.248271 5.668605 3.420334  0.87918296
## X17    7       non_tcds    1 45 4.150646 0.8011976 4.165139 2.542017 5.770249 3.228232  0.04840642
##        kurtosis        se
## X11 -0.76500796 0.2640805
## X12 -0.52053041 0.1248969
## X13 -0.75764546 0.1509762
## X14  0.06066605 0.1300644
## X15  0.41938715 0.1284563
## X16  0.83025135 0.1100091
## X17 -0.82614709 0.1194355
mlu_adult_en_minmax <- mlu_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(mlu_w, na.rm = T), 
         max = max(mlu_w, na.rm = T)) %>% 
  distinct(activity, min, max)

mlu_adult_en_minmax
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 English  adult-centered  1     9   
## 2 English  unst. conv.     1     6.67
## 3 English  non_tcds        1     8   
## 4 English  play            1.33  5.69
## 5 English  routines        2.2   8   
## 6 English  books           1.51  8.73
## 7 English  food            1     6.06
describeBy(mlu_adult_act_sp$mlu_w_act, mlu_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd   median      min      max    range       skew
## X11    1          books    1 20 3.822411 1.3037314 3.758225 1.864407 6.566855 4.702448  0.4630879
## X12    2           play    1 37 2.698639 0.7463847 2.542857 1.422360 4.545918 3.123558  0.4675977
## X13    3           food    1 31 2.772200 0.6975939 2.746032 1.391304 4.230769 2.839465 -0.0686621
## X14    4       routines    1 35 3.029701 0.7381221 3.000000 1.791667 4.485947 2.694281  0.2233092
## X15    5    unst. conv.    1 43 3.054863 0.5741442 2.821722 2.004357 4.589286 2.584928  0.7193086
## X16    6 adult-centered    1 45 2.756766 0.6920525 2.682044 1.666667 5.019476 3.352810  0.8007356
## X17    7       non_tcds    1 45 3.499380 0.7220236 3.461212 1.890000 5.202889 3.312889  0.1197576
##       kurtosis         se
## X11 -0.7168780 0.29152319
## X12 -0.4561800 0.12270488
## X13 -0.5377128 0.12529156
## X14 -1.0421283 0.12476540
## X15 -0.2066920 0.08755616
## X16  0.6194120 0.10316510
## X17 -0.2901765 0.10763292
mlu_adult_sp_minmax <- mlu_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(mlu_w, na.rm = T), 
         max = max(mlu_w, na.rm = T)) %>% 
  distinct(activity, min, max)

mlu_adult_sp_minmax
## # A tibble: 7 × 4
## # Groups:   activity, language [7]
##   language activity         min   max
##   <fct>    <fct>          <dbl> <dbl>
## 1 Spanish  books           1.86  8.36
## 2 Spanish  play            1.21  6.48
## 3 Spanish  unst. conv.     1     5.33
## 4 Spanish  routines        1.58  5   
## 5 Spanish  food            1.39  4.32
## 6 Spanish  adult-centered  1     6.22
## 7 Spanish  non_tcds        1     7.9

CHIP

Responses (RAW and PROP)

CHIP (responses, imitations/expansions; these are utterances that follow a child’s utterance, within a 5 utterance window)

  • excluded NTC
  • excluded overlapping ODS during CDS periods
  • averaged across all adult speakers
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • we keep 0 values, which are because the adults did not respond even though the child did have utterances [e.g., numerator is 0 but denominator is a value]; if a child did not have utterances, then adult responses could not be calculated so the number of observations differ here from freq and mlu
  • greater than 1 = caregiver had more utterances in response to the child; less than 1 = child had more utterances than caregiver responses
# create dfs
chip2 <- chip %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered"), 
         language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot - total adult responses
ggplot(chip2, aes(activity, total_adult_resp, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# plot - prop of adult resp/child utt
ggplot(chip2, aes(activity, prop_adultresp_outof_childutt, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  geom_hline(yintercept = 1) + # caregiver and child = equal utts
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Prop Responses")

ggsave("./figures/boxplot_prop_resp2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
chip_act <- chip %>% 
  group_by(id, activity) %>% 
  mutate(prop_resp_act = mean(prop_adultresp_outof_childutt), 
         prop_imitexp_act = mean(prop_adult_imitexp_outof_childutt)) %>% 
  distinct(id, language, activity, prop_resp_act, prop_imitexp_act)

chip_act_en <- chip_act %>% filter(language == "english")
chip_act_sp <- chip_act %>% filter(language == "spanish")


# descriptives
describeBy(chip_act_en$prop_resp_act, chip_act_en$activity, mat = T, fast = T)
##     item   group1 vars  n     mean        sd   median       min      max    range        skew
## X11    1    books    1 22 2.378070 0.7766933 2.342175 1.1864443 4.366667 3.180222  0.69916859
## X12    2     play    1 36 2.074418 0.6357208 1.925870 1.1301724 4.666667 3.536494  1.95571413
## X13    3     food    1 29 1.914779 0.5461100 1.944444 0.8141026 3.066667 2.252564  0.14337467
## X14    4 routines    1 28 1.856904 0.5897762 1.777778 0.8000000 3.333333 2.533333  0.57225823
## X15    5     conv    1 36 1.866984 0.5430045 1.849124 0.6250000 2.933333 2.308333 -0.03709437
## X16    6       ac    1 22 1.855095 0.5401576 1.815131 0.9909502 3.248677 2.257727  0.54471173
##        kurtosis         se
## X11  0.03584900 0.16559157
## X12  5.35965871 0.10595346
## X13 -0.49085257 0.10141009
## X14 -0.07450000 0.11145723
## X15 -0.55933392 0.09050074
## X16 -0.07700799 0.11516198
chip_adult_en_minmax_propresp <- chip %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adultresp_outof_childutt, na.rm = T), 
         max = max(prop_adultresp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_en_minmax_propresp
## # A tibble: 6 × 4
## # Groups:   activity, language [6]
##   language activity   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 english  ac       0      5   
## 2 english  conv     0.571  5   
## 3 english  play     0.667  4.67
## 4 english  routines 0.5    3.8 
## 5 english  books    0.859  5   
## 6 english  food     0      5
describeBy(chip_act_sp$prop_resp_act, chip_act_sp$activity, mat = T, fast = T)
##     item   group1 vars  n     mean        sd   median       min      max    range       skew
## X11    1    books    1 20 2.125135 0.4832411 2.109579 1.0617284 3.055556 1.993827 -0.3732188
## X12    2     play    1 31 1.927563 0.6742332 1.750000 0.8125000 3.500000 2.687500  0.5481160
## X13    3     food    1 30 1.762135 0.7100370 1.620192 0.3333333 3.314815 2.981481  0.3986445
## X14    4 routines    1 31 1.904239 0.5335071 1.900000 1.0000000 3.166667 2.166667  0.3703352
## X15    5     conv    1 35 1.818201 0.5605011 1.683208 0.8823529 3.119048 2.236695  0.6188577
## X16    6       ac    1 25 1.589711 0.6019761 1.524806 0.7083333 3.750000 3.041667  1.6448000
##       kurtosis         se
## X11 -0.2278578 0.10805599
## X12 -0.5407181 0.12109586
## X13 -0.3813870 0.12963442
## X14 -0.4500460 0.09582070
## X15 -0.4151453 0.09474197
## X16  4.0945489 0.12039523
chip_adult_sp_minmax_propresp <- chip %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adultresp_outof_childutt, na.rm = T), 
         max = max(prop_adultresp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_sp_minmax_propresp
## # A tibble: 6 × 4
## # Groups:   activity, language [6]
##   language activity   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 spanish  ac       0      5   
## 2 spanish  conv     0.7    5   
## 3 spanish  play     0.765  5   
## 4 spanish  food     0.333  4   
## 5 spanish  routines 1      5   
## 6 spanish  books    1.06   3.06

Imitations/Expansions (RAW and PROP)

# plot
ggplot(chip2, aes(activity, total_adult_imitexp, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# plot
ggplot(chip2, aes(activity, prop_adult_imitexp_outof_childutt, fill = activity)) + 
  theme_classic() + 
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  geom_hline(yintercept = 1) + # caregiver and child = equal utts
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Prop Imitations/Expansions")

ggsave("./figures/boxplot_prop_imitexp2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# descriptives
describeBy(chip_act_en$prop_imitexp_act, chip_act_en$activity, mat = T, fast = T)
##     item   group1 vars  n      mean        sd    median        min       max     range        skew
## X11    1    books    1 22 0.4822402 0.2032150 0.4606055 0.07142857 0.8000000 0.7285714 -0.17741053
## X12    2     play    1 36 0.4082746 0.2249434 0.4011154 0.00000000 1.1157407 1.1157407  0.96845872
## X13    3     food    1 29 0.3716799 0.1958653 0.3809524 0.00000000 1.0000000 1.0000000  0.91498868
## X14    4 routines    1 28 0.3527558 0.1764344 0.3737981 0.00000000 0.6746032 0.6746032 -0.08511947
## X15    5     conv    1 36 0.3490170 0.1925299 0.3014535 0.00000000 0.8888889 0.8888889  0.68222826
## X16    6       ac    1 22 0.3648964 0.1674221 0.3302305 0.18972991 0.8666667 0.6769368  1.33927429
##       kurtosis         se
## X11 -0.8102586 0.04332558
## X12  1.4668965 0.03749056
## X13  1.7868059 0.03637128
## X14 -0.7390449 0.03334296
## X15  0.1562880 0.03208831
## X16  1.3695376 0.03569452
chip_adult_en_minmax_propimitexp <- chip %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T), 
         max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_en_minmax_propimitexp
## # A tibble: 6 × 4
## # Groups:   activity, language [6]
##   language activity    min   max
##   <fct>    <fct>     <dbl> <dbl>
## 1 english  ac       0       4   
## 2 english  conv     0       2   
## 3 english  play     0       1.38
## 4 english  routines 0       1   
## 5 english  books    0.0714  1   
## 6 english  food     0       1
describeBy(chip_act_sp$prop_imitexp_act, chip_act_sp$activity, mat = T, fast = T)
##     item   group1 vars  n      mean        sd    median    min       max     range      skew
## X11    1    books    1 20 0.4052845 0.2414756 0.3428358 0.0625 1.0000000 0.9375000 0.7095279
## X12    2     play    1 31 0.3536352 0.2224772 0.3333333 0.0000 1.1700000 1.1700000 1.4354164
## X13    3     food    1 30 0.3333968 0.2401772 0.3088362 0.0000 1.0000000 1.0000000 0.9717588
## X14    4 routines    1 31 0.3949755 0.2920670 0.3562162 0.0000 1.0322636 1.0322636 0.5266545
## X15    5     conv    1 35 0.3856342 0.2314362 0.3640005 0.0000 0.9523810 0.9523810 0.7019027
## X16    6       ac    1 25 0.3015461 0.1634934 0.2982069 0.0000 0.7603067 0.7603067 0.8202077
##        kurtosis         se
## X11 -0.32529803 0.05399559
## X12  3.62662516 0.03995808
## X13  0.58774011 0.04385016
## X14 -0.74778343 0.05245678
## X15 -0.07136292 0.03911986
## X16  0.67007692 0.03269868
chip_adult_sp_minmax_propimitexp <- chip %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T), 
         max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_sp_minmax_propimitexp
## # A tibble: 6 × 4
## # Groups:   activity, language [6]
##   language activity   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 spanish  ac           0  1.67
## 2 spanish  conv         0  2   
## 3 spanish  play         0  2   
## 4 spanish  food         0  1   
## 5 spanish  routines     0  3   
## 6 spanish  books        0  1

Number of Speakers

# this was extracted using speaker roles from the FREQ output
# in a prior code, the n_speakers was calculated to reflect the number of speakers for the speaker role (child, adults), during the respective activity and segment num
# removing kwalods because these are overlapping utterances during tCDS activities

num_speakers <- read_csv("./data_demo_lena_transcripts/num_speakers.csv") %>% 
  filter(Speaker2 == "ADULTS" & activity != "kwalods") %>% 
  distinct(id, activity, segment_num, language, n_speakers) %>% 
  mutate(bin_n_adult_speakers = ifelse(n_speakers > 1, "more_than1", "1_adult")) %>% 
  group_by(activity) %>% 
  mutate(n_instances_by_activity = n()) %>% 
  ungroup() %>% 
  group_by(activity, bin_n_adult_speakers) %>% 
  mutate(n_by_bin = n())


num_speakers %>% 
  ungroup() %>% 
  distinct(activity, bin_n_adult_speakers, n_by_bin, n_instances_by_activity) %>% 
  mutate(prop = n_by_bin / n_instances_by_activity)
## # A tibble: 14 × 5
##    activity bin_n_adult_speakers n_by_bin n_instances_by_activity  prop
##    <chr>    <chr>                   <int>                   <int> <dbl>
##  1 books    1_adult                    67                      84 0.798
##  2 books    more_than1                 17                      84 0.202
##  3 play     1_adult                   133                     181 0.735
##  4 play     more_than1                 48                     181 0.265
##  5 conv     1_adult                   157                     254 0.618
##  6 conv     more_than1                 97                     254 0.382
##  7 routines 1_adult                    91                     121 0.752
##  8 routines more_than1                 30                     121 0.248
##  9 food     1_adult                    74                     115 0.643
## 10 food     more_than1                 41                     115 0.357
## 11 ac       1_adult                   215                     363 0.592
## 12 ac       more_than1                148                     363 0.408
## 13 gemods   1_adult                   259                     536 0.483
## 14 gemods   more_than1                277                     536 0.517